Exploratory Analysis of Demographic Characteristics Where Facilities are Located
# needed packages
library(tidyverse)
# to retrieve census data
library(censusapi)
# to read boundaries file
library(sf)# read prison boundaries file from Homeland Infrastructure Foundation-Level Data Platform
# downloaded in geodatabase form and unzipped, read as sf object
# link: https://hifld-geoplatform.opendata.arcgis.com/datasets/geoplatform::prison-boundaries/about
prison_boundaries <- st_read("./1075f3ca-0050-4264-82e2-079a2daf2ec5.gdb", quiet = TRUE)
# read results that contain block assignment for each facility saved in csv
prison_boundaries_blocks <- read_csv("./prison_blocks.csv")
# for prison boundaries file, get list of all state county combinations represented, including the
# codes for the corresponding states and counties
# resulting data frame has 4 columns -->
# COUNTYFIPS -- 5 digit code consisting of state code and county code
# STATE -- 2 chracter digit state abbreviation
# STATEFIPS -- 2 digit state FIPS
# COUNTYCODE -- 3 digit county code
state_county_list <- st_drop_geometry(prison_boundaries) %>%
group_by(COUNTYFIPS, STATE) %>%
summarize() %>%
select( COUNTYFIPS, STATE) %>%
mutate(STATE = as.character(STATE),
COUNTYFIPS = as.character(COUNTYFIPS),
STATEFIPS = substr(COUNTYFIPS, 1, 2),
COUNTYCODE= substr(COUNTYFIPS,
nchar(COUNTYFIPS) - 2,
nchar(COUNTYFIPS)))
# remove the NOT AVAILABLE entry
state_county_list_filtered <- state_county_list[state_county_list$COUNTYFIPS != "NOT AVAILABLE",]
# get all state fips codes in prison boundaries data
state_codes <- state_county_list_filtered %>%
group_by(STATEFIPS) %>%
summarize() %>%
pull(STATEFIPS)Analysis at the Block Group Level
# can use this function to see what variables are available for the given dataset
# listCensusMetadata(name = "pdb/blockgroup", vintage = 2021, type = "variables", group = NULL)
# get census data for the variables listed at the blockgroup level; block level not available
get_census_data_safe <- function(state_code) {
region_for_state = paste0("state:", state_code, "+county:*+tract:*")
result <- tryCatch(
{
getCensus(name = "pdb/blockgroup",
vintage = 2021,
key = "fbbc8b0d3e53089da7cabc380628d6d46ae00444",
vars = c("Block_group",
"County",
"URBANIZED_AREA_POP_CEN_2010",
"Tot_Population_CEN_2010",
"Tot_Population_ACS_15_19",
"Males_ACS_15_19",
"Females_ACS_15_19",
"Median_Age_ACS_15_19",
"Hispanic_ACS_15_19",
"NH_White_alone_ACS_15_19",
"NH_Blk_alone_ACS_15_19",
"NH_AIAN_alone_ACS_15_19",
"NH_Asian_alone_ACS_15_19",
"NH_NHOPI_alone_ACS_15_19",
"NH_SOR_alone_ACS_15_19",
"Pov_Univ_ACS_15_19",
"Prs_Blw_Pov_Lev_ACS_15_19",
"No_Health_Ins_ACS_15_19",
"Tot_Occp_Units_ACS_15_19",
"Aggregate_HH_INC_ACS_15_19",
"Med_HHD_Inc_BG_ACS_15_19",
"Tot_Prns_in_HHD_ACS_15_19",
"Renter_Occp_HU_ACS_15_19",
"Med_House_Value_BG_ACS_15_19",
"pct_ENG_VW_ACS_15_19",
"pct_URBANIZED_AREA_POP_CEN_2010",
"pct_RURAL_POP_CEN_2010",
"pct_Inst_GQ_CEN_2010",
"pct_Non_Inst_GQ_CEN_2010",
"pct_URBAN_CLUSTER_POP_CEN_2010",
"pct_PUB_ASST_INC_ACS_15_19"
),
regionin = region_for_state,
region = "block group:*")
} ,
error=function(e) {
return()
}
)
return(result)
}
# get block group level data for every state in state_codes - only run when making changes to the variables included in the function above
# blockgroup_level_data <- map_dfr(state_codes, ~get_census_data_safe(.x))
# remove of redundant columns
# blockgroup_level_data <- blockgroup_level_data %>%
# select(-Block_group, County)
# write_csv(blockgroup_level_data, "./blockgroup_level_data.csv")
blockgroup_level_data <- read_csv("./blockgroup_level_data.csv")
# get block group number, which is the first digit of the block number
prison_boundaries_blocks <- prison_boundaries_blocks %>%
mutate(block_group = as.numeric(substr(BLOCKCE10, 1,1)))
# join prison boundaries data to the census data
prison_boundaries_block_census <- left_join(prison_boundaries_blocks, blockgroup_level_data,
by = c("STATEFP10" = "state",
"COUNTYFP10" = "county",
"TRACTCE10" = "tract",
"block_group" = "block_group"))
# all variables in the dataset
all_vars <- prison_boundaries_block_census %>% colnames()Variable definitions are from the documentation for the 2021 Planning Database.
First, we can look at the distributions of the median age in block groups where facilities are located.
Distribution of Median Age
library(forcats)
#### FOR COMPARISON: distribution of median age for all block groups across the U.S.
blockgroup_level_data %>%
select(Median_Age_ACS_15_19) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
geom_histogram() +
geom_vline(aes(xintercept = median(Median_Age_ACS_15_19, na.rm = TRUE)), color = "darkred") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Across the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 10), limits = c(0, 100))# Distribution of Median Age in Block Groups Where Facilities are Located using Median_Age_ACS_15_19 variable
###### distribution of median age by block group
prison_boundaries_block_census %>%
group_by(STATE, COUNTY, TRACTCE10, block_group, Median_Age_ACS_15_19) %>%
summarize() %>%
select(Median_Age_ACS_15_19) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
geom_histogram() +
geom_vline(aes(xintercept = median(Median_Age_ACS_15_19, na.rm = TRUE)), color = "darkred") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 10), limits = c(0,100))###### distribution of median age by facility
prison_boundaries_block_census %>%
select(Median_Age_ACS_15_19) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
geom_histogram() +
geom_vline(aes(xintercept = median(Median_Age_ACS_15_19, na.rm = TRUE)), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# median age distribution faceted by facility type
##### by block group
prison_boundaries_block_census %>%
group_by(STATE, COUNTY, TRACTCE10, block_group, Median_Age_ACS_15_19, TYPE) %>%
summarize() %>%
group_by(TYPE) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
ungroup() %>%
select(Median_Age_ACS_15_19, TYPE, Median) %>%
mutate(TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~TYPE, scales = "free_y") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))##### by facility
prison_boundaries_block_census %>%
group_by(TYPE) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
select(Median_Age_ACS_15_19, TYPE, Median) %>%
mutate(TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~TYPE, scales = "free_y") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# median age distribution faceted by facility security level
##### by block group
prison_boundaries_block_census %>%
group_by(STATE, COUNTY, TRACTCE10,
block_group, Median_Age_ACS_15_19,
SECURELVL) %>%
summarize() %>%
group_by(SECURELVL) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
ungroup() %>%
select(Median_Age_ACS_15_19, SECURELVL, Median) %>%
mutate(SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~SECURELVL, scales = "free_y") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))##### by facility
prison_boundaries_block_census %>%
group_by(SECURELVL) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
select(Median_Age_ACS_15_19, SECURELVL, Median) %>%
mutate(SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~SECURELVL, scales = "free_y") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))Distribution of the Percentage Urbanized
As defined in the documentation, the variable definitions are as follows:
*pct_URBANIZED_AREA_POP_CEN_2010: “percentage of the 2010 Census total population that lives in a densely settled area containing 50,000 or more people”
*pct_URBAN_CLUSTER_POP_CEN_2010: “percentage of the 2010 Census total population that lives in a densely settled area containing 2,500 to 49,999 people”
*pct_RURAL_POP_CEN_2010: “percentage of the 2010 Census total population that lives outside of an urbanized area or urban cluster”
#### FOR COMPARISON -- distribution of percentage urbanized across the full U.S.
blockgroup_level_data %>%
mutate(Median = median(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_URBANIZED_AREA_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_URBANIZED_AREA_POP_CEN_2010 )) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage Urbanized",
subtitle = "Mean in Blue, Median in Red\n2010 Census Data",
title = "Distribution of the Percentage Urbanized for Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) # distribution of percentage urbanized
# by facility
prison_boundaries_block_census %>%
mutate(Median = median(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_URBANIZED_AREA_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_URBANIZED_AREA_POP_CEN_2010 )) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage Urbanized",
subtitle = "Mean in Blue, Median in Red\n2010 Census Data",
title = "Distribution of the Percentage Urbanized Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
ylim(0, 4100)# how many block groups with multiple facilities
#prison_boundaries_block_census %>%
# group_by(STATE, COUNTY, TRACTCE10, block_group) %>%
# summarize(n = n()) %>% filter(n > 1 ) %>% nrow()
#### FOR COMPARISON -- distribution of percentage rural across the U.S.
blockgroup_level_data %>%
mutate(Median = median(pct_RURAL_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_RURAL_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_RURAL_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_RURAL_POP_CEN_2010 )) +
geom_histogram() +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage Rural",
subtitle = "Mean in Blue, Median in Red\n2010 Census Data",
title = "Distribution of the Percentage Rural in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) # distribution
# distribution of percentage rural
prison_boundaries_block_census %>%
mutate(Median = median(pct_RURAL_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_RURAL_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_RURAL_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_RURAL_POP_CEN_2010 )) +
geom_histogram() +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage Rural",
subtitle = "Mean in Blue, Median in Red\n2010 Census data ",
title = "Distribution of the Percentage Rural Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
ylim(0, 4100)# for making tables
library(kableExtra)
# table of how many block groups had > 50% rural and how many had < 50% rural
prison_boundaries_block_census %>%
select(pct_URBANIZED_AREA_POP_CEN_2010,
pct_RURAL_POP_CEN_2010,
pct_URBAN_CLUSTER_POP_CEN_2010) %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50, "Rural", "Urban")) %>%
group_by(Density) %>%
summarize(n =n()) %>%
mutate(Density = replace_na(Density, "Missing"),
total = sum(n),
Percentage = (n / total)*100,
Percentage = paste0(as.character(round(Percentage, 2)), '%')) %>%
select(Density, Percentage) %>%
kbl(caption = "<span style = 'color:black;font-size:2vw;'><b>Percentage of Facilities in a Block Group that was Mostly Rural or Mostly Urban</b></span><span style = 'font-size: 1.5vw; color:black;'><br>Rural block groups are defined here as those where greater than 50% of the 2010 Census total\npopulation for that block group <br> was recorded to live outside of an urbanized area or urban cluster<br></span><i><span style = 'font-size: 1vw'>Data from the 2010 Census</i></span>") %>%
kable_material(c("striped", "hover"), full_width = T) %>%
row_spec(0, background = "#D9DFEE") | Density | Percentage |
|---|---|
| Rural | 31.12% |
| Urban | 68.25% |
| Missing | 0.64% |
# footnote("Rural block groups are defined here as those where greater than 50% of the 2010 Census total\npopulation for that block group was recorded to live outside of an urbanized area or urban cluster.")
# to add tab use  
### FOR COMPARISON -- table with mean and median percentages urbanized, urban cluster, and rural in the U.S.
blockgroup_level_data %>%
summarize(Urban = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
`Urban Cluster` = mean(pct_URBAN_CLUSTER_POP_CEN_2010, na.rm = TRUE),
Rural = mean(pct_RURAL_POP_CEN_2010, na.rm = TRUE)) %>%
pivot_longer(cols = 1:3, names_to = "Population Density Category",
values_to = "Mean Percentage") %>%
cbind("Median Percentage" = c(
median(blockgroup_level_data$pct_URBANIZED_AREA_POP_CEN_2010,
na.rm = TRUE),
median(blockgroup_level_data$pct_URBAN_CLUSTER_POP_CEN_2010,
na.rm = TRUE),
median(blockgroup_level_data$pct_RURAL_POP_CEN_2010, na.rm = TRUE))) %>%
kbl(caption = "<span style = 'color:black;font-size:2vw;'><b>For Block Groups in the United States: Mean and Median Percentages in Urbanized, Urban Cluster, and Rural Areas</b></span><span style = 'float:center'><i><span style = 'font-size: 1vw'><br> Data from the 2010 Census</i></span>") %>%
kable_material(c("striped", "hover"), full_width = F) %>%
row_spec(0, background = "#D9DFEE") | Population Density Category | Mean Percentage | Median Percentage |
|---|---|---|
| Urban | 69.00312 | 100 |
| Urban Cluster | 10.30077 | 0 |
| Rural | 20.69611 | 0 |
# table with mean and median percentages urbanized, urban cluster, and rural
prison_boundaries_block_census %>%
summarize(Urban = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
`Urban Cluster` = mean(pct_URBAN_CLUSTER_POP_CEN_2010, na.rm = TRUE),
Rural = mean(pct_RURAL_POP_CEN_2010, na.rm = TRUE)) %>%
pivot_longer(cols = 1:3, names_to = "Population Density Category",
values_to = "Mean Percentage") %>%
cbind("Median Percentage" = c(
median(prison_boundaries_block_census$pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
median(prison_boundaries_block_census$pct_URBAN_CLUSTER_POP_CEN_2010, na.rm = TRUE),
median(prison_boundaries_block_census$pct_RURAL_POP_CEN_2010, na.rm = TRUE))) %>%
kbl(caption = "<span style = 'color:black;font-size:2vw;'><b> For Block Groups Where Facilities are Located: Mean and Median Percentages in Urbanized, Urban Cluster, and Rural Areas</b></span><span style = 'float:center'><i><span style = 'font-size: 1vw'><br> Data from the 2010 Census</i></span>") %>%
kable_material(c("striped", "hover"), full_width = F) %>%
row_spec(0, background = "#D9DFEE") | Population Density Category | Mean Percentage | Median Percentage |
|---|---|---|
| Urban | 36.63423 | 0.0 |
| Urban Cluster | 29.42237 | 0.0 |
| Rural | 33.94341 | 7.6 |
We can also see how the distribution of age varies depending on whether blocks are primarily urban or rural, among different facility types, and among different facility security levels.
###### distribution of median age in rural versus urban settings, where rural blocks are defined as those
# where more than 50% of the population is rural
# mean of the distribution is higher in rural areas, a trend that has been reported elsewhere
prison_boundaries_block_census %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50,
"Rural", "Urban")) %>%
group_by(Density, TYPE) %>%
mutate(Mean = mean(Median_Age_ACS_15_19,
na.rm =TRUE),
Median = median(Median_Age_ACS_15_19,
na.rm = TRUE)) %>%
filter(!is.na(Density)) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
facet_grid(TYPE~Density, scales = "free_y") +
geom_histogram(alpha = .8) +
geom_vline(aes(xintercept = Median ), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data & 2010 Census Data",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# faceted by rural/urban status and facility type
prison_boundaries_block_census %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50,
"Rural", "Urban")) %>%
group_by(Density, TYPE) %>%
mutate(Mean = mean(Median_Age_ACS_15_19, na.rm =TRUE),
Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
filter(!is.na(Density)) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
facet_grid(TYPE~Density, scales = "free_y") +
geom_histogram(alpha = .8) +
geom_vline(aes(xintercept = Median ), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data & 2010 Census Data",
title = "Distribution of Median Age in Block Groups Where Facilities are Located\n by Population Density Category and Faility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# faceted by rural/urban status and county type
prison_boundaries_block_census %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50,
"Rural", "Urban")) %>%
group_by(Density, SECURELVL) %>%
mutate(Mean = mean(Median_Age_ACS_15_19, na.rm =TRUE),
Median = median(Median_Age_ACS_15_19, na.rm = TRUE),
SECURELVL = factor(SECURELVL,
levels = c("JUVENILE",
"MINIMUM", "MEDIUM",
"MAIMUM", "CLOSE",
"NOT AVAILABLE"))) %>%
filter(!is.na(Density)) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
facet_grid(SECURELVL~Density, scales = "free_y") +
geom_histogram(alpha = .8) +
geom_vline(aes(xintercept = Median ), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data & 2010 Census Data",
title = "Distribution of Median Age in Block Groups Where Facilities are Located\n by Population Density Category and Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))Distribution of Racial Percentages for each Racial Category
# prison_blocks_race
#
# mutate(sum_all = Hispanic_ACS_15_19 + NH_AIAN_alone_ACS_15_19 + NH_Asian_alone_ACS_15_19 + NH_Blk_alone_ACS_15_19 + NH_NHOPI_alone_ACS_15_19 + NH_SOR_alone_ACS_15_19 + NH_White_alone_ACS_15_19) %>% View()
# for race reference csv, copied and pasted definitions at link here https://api.census.gov/data/2021/pdb/blockgroup/variables.html into excel file
race_ref <- read_csv("./races_reference.csv", col_names = FALSE) %>%
select(X1, X2) %>%
rename(`Racial Category` = X1,
Description = X2) %>%
mutate(Description = str_replace(Description, "in the ACS", ""),
Description = str_replace(Description, ",", ",\n")) %>%
filter(!str_detect(Description, "Census") & !str_detect(Description, "MOE") )
blockgroup_level_data %>%
select(
Hispanic_ACS_15_19, NH_AIAN_alone_ACS_15_19,
NH_Asian_alone_ACS_15_19, NH_Blk_alone_ACS_15_19,
NH_NHOPI_alone_ACS_15_19, NH_SOR_alone_ACS_15_19,
NH_White_alone_ACS_15_19, Tot_Population_ACS_15_19) %>%
mutate(across(1:ncol(.), ~ (.x / Tot_Population_ACS_15_19) * 100)) %>% # get percentage by dividing by total for that block
select(-Tot_Population_ACS_15_19) %>%
pivot_longer(1:ncol(.), names_to = "Racial Category", values_to ="Percentage") %>%
left_join(race_ref, by = c("Racial Category" = "Racial Category")) %>%
group_by(Description) %>%
mutate(Median = median(Percentage, na.rm = TRUE),
Mean = mean(Percentage, na.rm = TRUE)) %>%
ggplot(aes(x = Percentage)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
facet_wrap(~Description, scales = "free_y", ncol = 2) +
scale_x_continuous(breaks = seq(0, 100, by = 10)) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median in Red, Mean in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of Racial Percentages in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))prison_race <- prison_boundaries_block_census %>%
select(FACILITYID, TYPE, SECURELVL,
Hispanic_ACS_15_19, NH_AIAN_alone_ACS_15_19,
NH_Asian_alone_ACS_15_19, NH_Blk_alone_ACS_15_19,
NH_NHOPI_alone_ACS_15_19, NH_SOR_alone_ACS_15_19,
NH_White_alone_ACS_15_19, Tot_Population_ACS_15_19) %>%
mutate(across(4:ncol(.), ~ (.x / Tot_Population_ACS_15_19) * 100)) %>% # get percentage by dividing by total for that block
select(-Tot_Population_ACS_15_19) %>%
pivot_longer(4:ncol(.), names_to = "Racial Category", values_to ="Percentage") %>%
left_join(race_ref, by = c("Racial Category" = "Racial Category"))
prison_race %>%
group_by(Description) %>%
mutate(Median = median(Percentage, na.rm = TRUE),
Mean = mean(Percentage, na.rm = TRUE)) %>%
ggplot(aes(x = Percentage)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
facet_wrap(~Description, scales = "free_y", ncol = 2) +
scale_x_continuous(breaks = seq(0, 100, by = 10)) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of Racial Percentages in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))Distribution of Percentage without Health Insurance
blockgroup_level_data %>%
mutate(pct_no_insurance = (No_Health_Ins_ACS_15_19 / Tot_Population_ACS_15_19) * 100,
Mean = mean(pct_no_insurance, na.rm = TRUE),
Median = median(pct_no_insurance, na.rm = TRUE)) %>%
ggplot(aes(x = pct_no_insurance)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Lacking Health Insurance in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(0,100)# faceted by facility type
prison_boundaries_block_census %>%
mutate(pct_no_insurance = (No_Health_Ins_ACS_15_19 / Tot_Population_ACS_15_19) * 100) %>%
group_by(TYPE) %>%
mutate(
Mean = mean(pct_no_insurance, na.rm = TRUE),
Median = median(pct_no_insurance, na.rm = TRUE),
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_no_insurance)) +
geom_histogram() +
facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Lacking Health Insurance in Block Groups Where Facilities are Located\nFaceted By Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(0,100)# faceted by security level
prison_boundaries_block_census %>%
mutate(pct_no_insurance = (No_Health_Ins_ACS_15_19 / Tot_Population_ACS_15_19) * 100) %>%
group_by(SECURELVL) %>%
mutate(
Mean = mean(pct_no_insurance, na.rm = TRUE),
Median = median(pct_no_insurance, na.rm = TRUE),
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_no_insurance)) +
geom_histogram() +
facet_wrap(~SECURELVL, scales = "free_y", ncol = 2) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Lacking Health Insurance in Block Groups Where Facilities are Located\nFaceted By Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(0,100)Distribution of Percentages Below the Poverty Level
Referencing the documentation:
Prs_Blw_Pov_Lev_ACS_15_19is defined as the “Number of people classified as below the poverty level given their total family or household income within the last year, family size, and family composition in the ACS population”. To calculate the percentage, we can use thePov_Univ_ACS_15_19variable, which is defined as the “Population for whom poverty level is determined”.
#### FOR COMPARISON -- poverty distribution for all block groups in the U.S.
blockgroup_level_data %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# for block groups where facilities are located
prison_boundaries_block_census %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by facility type
prison_boundaries_block_census %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
group_by(TYPE) %>%
mutate(Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups Where Facilities are Located\nFaceted by Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by security level
prison_boundaries_block_census %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
group_by(SECURELVL) %>%
mutate(Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
facet_wrap(~SECURELVL, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups Where Facilities are Located\nFaceted by Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)Distribution of the Percentage of Housing Units Rented
Renter_Occp_HU_ACS_15_19 is defined as “Number of ACS occupied housing units that are not owner occupied, whether they are rented or occupied without payment of rent”.
# distribution of percentage of housing units rented
blockgroup_level_data %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\nin Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)prison_boundaries_block_census %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\n in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by facility type
prison_boundaries_block_census %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
group_by(TYPE) %>%
mutate(Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\n in Block Groups Where Facilities are Located\nFaceted by Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by security level
prison_boundaries_block_census %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
group_by(SECURELVL) %>%
mutate(Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
facet_wrap(~SECURELVL, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\n in Block Groups Where Facilities are Located\nFaceted by Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)Distribution of Average Household Size
Tot_Prns_in_HHD_ACS_15_19is defined as the “All persons (household population) in ACS occupied housing unit. This is the numerator to calculate the average household size”. The denominator used to calculate the average household size isTot_Occp_Units_ACS_15_19.
# distribution of average household size for block groups in the U.S.
blockgroup_level_data %>%
mutate(avg_house = Tot_Prns_in_HHD_ACS_15_19 / Tot_Occp_Units_ACS_15_19,
Mean = mean(avg_house, na.rm = TRUE),
Median = median(avg_house, na.rm = TRUE)) %>%
ggplot(aes(x = avg_house)) +
geom_histogram(alpha = .9, bins = 40) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Number of People",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Average Household Size\nin Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) + xlim(0, 8)prison_boundaries_block_census %>%
mutate(avg_house = Tot_Prns_in_HHD_ACS_15_19 / Tot_Occp_Units_ACS_15_19,
Mean = mean(avg_house, na.rm = TRUE),
Median = median(avg_house, na.rm = TRUE)) %>%
ggplot(aes(x = avg_house)) +
geom_histogram(alpha = .9, bins = 40) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Number of People",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Average Household Size\nin Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic",
hjust = .5),
plot.title = element_text(face = "bold",
hjust = 0.5)) +
xlim(0, 8)Distribution of Percentage Institutionalized
pct_Inst_GQ_CEN_2010is defined as “The percentage of the 2010 Census population who live in group quarters and are primarily ineligible, unable, or unlikely to participate in labor force while residents. Institutional group quarters include correctional facilities for adults, juvenile facilities, nursing facilities, and other institutional facilities.”
# distribution of the Percentage Living in Institutional Group Quarters across all block groups in the U.S.
blockgroup_level_data %>%
mutate(
Mean = mean(pct_Inst_GQ_CEN_2010, na.rm = TRUE),
Median = median(pct_Inst_GQ_CEN_2010, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Inst_GQ_CEN_2010)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2010 Census data",
title = "Distribution of the Percentage Living in Institutional Group Quarters\nin Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) # distribution of the Percentage Living in Institutional Group Quarters in block groups where facilities are
prison_boundaries_block_census %>%
mutate(
Mean = mean(pct_Inst_GQ_CEN_2010, na.rm = TRUE),
Median = median(pct_Inst_GQ_CEN_2010, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Inst_GQ_CEN_2010)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2010 Census data",
title = "Distribution of the Percentage Living in Institutional Group Quarters\nin Block Groups Where Facilities Are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5),
limits = c(-3,105)) # distribution of the Percentage Living in Institutional Group Quarters in block groups where facilities are,
# faceted by facility type
prison_boundaries_block_census %>%
group_by(TYPE) %>%
mutate(
Mean = mean(pct_Inst_GQ_CEN_2010, na.rm = TRUE),
Median = median(pct_Inst_GQ_CEN_2010, na.rm = TRUE),
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_Inst_GQ_CEN_2010)) +
geom_histogram(alpha = .9) +
facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2010 Census data",
title = "Distribution of the Percentage Living in Institutional Group Quarters\nin Block Groups Where Facilities Are Located\nFaceted by Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 20),
limits = c(-3,105)) # distribution of the Percentage Living in Institutional Group Quarters in block groups where facilities are,
# faceted by security level
prison_boundaries_block_census %>%
group_by(SECURELVL) %>%
mutate(
Mean = mean(pct_Inst_GQ_CEN_2010, na.rm = TRUE),
Median = median(pct_Inst_GQ_CEN_2010, na.rm = TRUE),
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_Inst_GQ_CEN_2010)) +
geom_histogram(alpha = .9) +
facet_wrap(~SECURELVL, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2010 Census data",
title = "Distribution of the Percentage Living in Institutional Group Quarters\nin Block Groups Where Facilities Are Located\nFaceted by Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 20),
limits = c(-3,105)) Distribution of the Percentage Receiving Public Assistance Income
pct_PUB_ASST_INC_ACS_15_19is the “percentage of all ACS occupied housing units that receive public assistance income (general assistance and Temporary Assistance to Needy Families).” There are two main categories of public assistance programs, social welfare programs and social insurance programs. Social welfare programs generally have low income criteria for eligibility, and include programs such as Supplemental Security Income (SSI), Supplemental Nutrition Assistance Program (SNAP), Special Supplemental Nutrition Program for Women, Infants, and Children (WIC) Temporary Assistance for Needy Families (TANF), and General Assistance (GA). Social insurance programs, meanwhile, are based on criteria outside income, such as age, employment status, or being a veteran. These include Social security (self and on behalf of a dependent child), Department of Veterans’ Affairs benefits (except Veteran’s pension), Unemployment insurance compensation, and Workers’ compensation.
# distribution of percentage receiving public assistance income in block groups in the U.S.
blockgroup_level_data %>%
mutate(
Mean = mean(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE),
Median = median(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_PUB_ASST_INC_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Receiving Public Assistance Income\nin Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) # distribution of percentage receiving public assistance income in block groups where facilities are located
prison_boundaries_block_census %>%
mutate(
Mean = mean(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE),
Median = median(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_PUB_ASST_INC_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Receiving Public Assistance Income\nin Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) # distribution of percentage receiving public assistance income in block groups where facilities are located
# faceted by facility type
prison_boundaries_block_census %>%
group_by(TYPE) %>%
mutate(
Mean = mean(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE),
Median = median(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE),
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_PUB_ASST_INC_ACS_15_19)) +
geom_histogram(alpha = .9, bins = 40) + facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Receiving Public Assistance Income\nin Block Groups Where Facilities are Located\nFaceted by Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 10), limits = c(-3,105))# distribution of percentage receiving public assistance income in block groups where facilities are located
# faceted by security level
prison_boundaries_block_census %>%
group_by(SECURELVL) %>%
mutate(
Mean = mean(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE),
Median = median(pct_PUB_ASST_INC_ACS_15_19, na.rm = TRUE),
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_PUB_ASST_INC_ACS_15_19)) +
geom_histogram(alpha = .9, bins = 40) +
facet_wrap(~SECURELVL, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Receiving Public Assistance Income\nin Block Groups Where Facilities are Located\nFaceted by Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 10), limits = c(-3,105))Distribution of Percentage Where No Adult Speaks English “Very Well”
pct_ENG_VW_ACS_15_19is defined as the “percentage of all ACS occupied housing units where no one ages 14 years and over speaks English only or speaks English ‘very well’”.
# distribution across the U.S.
blockgroup_level_data %>%
mutate(
Mean = mean(pct_ENG_VW_ACS_15_19, na.rm = TRUE),
Median = median(pct_ENG_VW_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_ENG_VW_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage Where No One Over 14 in the Household Speaks English "Very Well"\nin Block Groups in the U.S.') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) #distribution in block groups where facilities are located
prison_boundaries_block_census %>%
mutate(
Mean = mean(pct_ENG_VW_ACS_15_19, na.rm = TRUE),
Median = median(pct_ENG_VW_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_ENG_VW_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage Where No One Over 14 in the Household Speaks English "Very Well" \nin Block Groups Where Facilities are Located') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) Tract Level Analysis
Some variables are not available at the block group level, but still may be relevant. These include the percentage of the population lacking a computing device, the percentage of the popoulation with broadband internet access and a computing device, and the percentage of the population 16 years and older who are unemployed.
get_census_data_tract <- function(state_code) {
region_for_state = paste0("state:", state_code, "+county:*")
result <- tryCatch(
{
getCensus(name = "pdb/tract",
vintage = 2021,
key = "fbbc8b0d3e53089da7cabc380628d6d46ae00444",
vars = c(
"pct_Pop_NoCompDevic_ACS_15_19",
"pct_Pop_w_BroadComp_ACS_15_19",
"pct_Civ_unemp_16p_ACS_15_19"
),
regionin = region_for_state,
region = "tract:*")
} ,
error=function(e) {
return()
}
)
return(result)
}
### NOTE -- attempted trying to get the following variables as well, but they just returned NAs
# Aggregate_HH_INC_ACS_15_19
# Med_HHD_Inc_ACS_15_19
# avg_Agg_HH_INC_ACS_15_19
# Med_HHD_Inc_ACS_15_19_1
# Med_House_Value_ACS_15_19
# get tract level data for all states, only run this section if changing the variables from function above
#tract_level_data <- map_dfr(state_codes, ~get_census_data_tract(.x))
#write_csv(tract_level_data, "./tract_level_data.csv")
tract_level_data <- read_csv("./tract_level_data.csv")
prison_tract <- left_join(prison_boundaries_blocks, tract_level_data,
by = c("STATEFP10" = "state",
"COUNTYFP10" = "county",
"TRACTCE10" = "tract"))Distribution of Percentages Lacking a Computing Device
The tract level data has additional variables available, and separate documentation outlining the definitions.
The variable
pct_Pop_NoCompDevic_ACS_15_19is defined as the “Percentage of people that live in households that do not have computing device of any kind in the ACS”.
# across U.S.
tract_level_data %>%
mutate(
Mean = mean(pct_Pop_NoCompDevic_ACS_15_19, na.rm = TRUE),
Median = median(pct_Pop_NoCompDevic_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Pop_NoCompDevic_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Tracts",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage of the Population Without a Computing Device\nin Tracts in the U.S.') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) # for tracts where facilities are located
prison_tract %>%
mutate(
Mean = mean(pct_Pop_NoCompDevic_ACS_15_19, na.rm = TRUE),
Median = median(pct_Pop_NoCompDevic_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Pop_NoCompDevic_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage of the Population Without a Computing Device\nin Tracts Where Facilities are Located') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) Distribution of Percentages With Access to Broadband Wifi
The variable
pct_Pop_w_BroadComp_ACS_15_19is defined as the “Percentage of people that live in households that have both broadband Internet access and a computing device of any kind in the ACS”.
The thresholds for upload and download speeds are set by the Federal Communications Commission, and have changed over time, shaping our view on the digital divides that exist.
# across the U.S.
tract_level_data %>%
mutate(
Mean = mean(pct_Pop_w_BroadComp_ACS_15_19, na.rm = TRUE),
Median = median(pct_Pop_w_BroadComp_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Pop_w_BroadComp_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Tracts",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage of the Population With Broadband Internet Access\nin Tracts in the U.S.') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) # for tracts where facilities are lcoated
prison_tract %>%
mutate(
Mean = mean(pct_Pop_w_BroadComp_ACS_15_19, na.rm = TRUE),
Median = median(pct_Pop_w_BroadComp_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Pop_w_BroadComp_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage of the Population With Broadband Internet Access\nin Tracts Where Facilities are Located') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) Distribution of the Percentage Unemployed
The variable
pct_Civ_unemp_16p_ACS_15_19is defined as the “percentage of ACS civilians ages 16 years and over in the labor force that are unemployed”.
# across the U.S.
tract_level_data %>%
mutate(
Mean = mean(pct_Civ_unemp_16p_ACS_15_19, na.rm = TRUE),
Median = median(pct_Civ_unemp_16p_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Civ_unemp_16p_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Tracts",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage of Civilians Ages 16+ in the Labor Force Who are Unemployed\nin Tracts in the U.S.') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105)) # in tracts where facilities are located
prison_tract %>%
mutate(
Mean = mean(pct_Civ_unemp_16p_ACS_15_19, na.rm = TRUE),
Median = median(pct_Civ_unemp_16p_ACS_15_19, na.rm = TRUE)) %>%
ggplot(aes(x = pct_Civ_unemp_16p_ACS_15_19)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = 'Distribution of the Percentage of Civilians Ages 16+ in the Labor Force Who are Unemployed\nin Tracts Where Facilities are Located') +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 5), limits = c(-3,105))